PyTorch walkthrough generated by modifying & combining several tutorials:
This notebeook will walk through:
Dependencies (tested on):
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
%matplotlib inline
# If you have a GPU set this to true!USE_CUDA = False
torch.manual_seed(1) # reproducible
# Hyper ParametersEPOCH = 1 # train the training data n times, to save time, we just train 1 epoch
BATCH_SIZE = 50
LR = 0.001 # learning rate
DOWNLOAD_MNIST = True # set to False if you have downloaded
# Mnist digits datasettrain_data = torchvision.datasets.MNIST(
root='./mnist/',
train=True, # this is training data
transform=torchvision.transforms.ToTensor(), # Converts a PIL.Image or numpy.ndarray to
# torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0]download=DOWNLOAD_MNIST, # download it if you don't have it
)
# plot one exampleprint(train_data.train_data.size()) # (60000, 28, 28)
print(train_data.train_labels.size()) # (60000)
i=5
plt.imshow(train_data.train_data[i].numpy(), cmap='gray')
plt.title('%i' % train_data.train_labels[i])
plt.show()
# Data Loader for easy mini-batch return in training, the image batch shape will be (50, 1, 28, 28)train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
# convert test data into Variable, pick 2000 samples to speed up testingtest_data = torchvision.datasets.MNIST(root='./mnist/', train=False)
test_x = Variable(torch.unsqueeze(test_data.test_data, dim=1)).type(torch.FloatTensor)[:2000]/255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_data.test_labels[:2000]
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential( # input shape (1, 28, 28)
nn.Conv2d(
in_channels=1, # input height
out_channels=16, # n_filters
kernel_size=5, # filter size
stride=1, # filter movement/step
padding=2, # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
), # output shape (16, 28, 28)nn.ReLU(), # activation
nn.MaxPool2d(kernel_size=2), # choose max value in 2x2 area, output shape (16, 14, 14)
)
self.conv2 = nn.Sequential( # input shape (1, 28, 28)
nn.Conv2d(16, 32, 5, 1, 2), # output shape (32, 14, 14)
nn.ReLU(), # activation
nn.MaxPool2d(2), # output shape (32, 7, 7)
)
self.out = nn.Linear(32 * 7 * 7, 10) # fully connected layer, output 10 classes
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1) # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
output = self.out(x)
return output, x # return x for visualization
cnn = CNN()
print(cnn) # net architecture
optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted
# following function (plot_with_labels) is for visualization, can be ignored if not interestedfrom matplotlib import cm
try: from sklearn.manifold import TSNE; HAS_SK = True
except: HAS_SK = False; print('Please install sklearn for layer visualization')
def plot_with_labels(lowDWeights, labels):
plt.cla()
X, Y = lowDWeights[:, 0], lowDWeights[:, 1]
for x, y, s in zip(X, Y, labels):
c = cm.rainbow(int(255 * s / 9)); plt.text(x, y, s, backgroundcolor=c, fontsize=9)
plt.xlim(X.min(), X.max()); plt.ylim(Y.min(), Y.max()); plt.title('Visualize last layer'); plt.show(); plt.pause(0.01)
plt.ion()
# training and testingfor epoch in range(EPOCH):
for step, (x, y) in enumerate(train_loader): # gives batch data, normalize x when iterate train_loader
b_x = Variable(x) # batch x
b_y = Variable(y) # batch y
output = cnn(b_x)[0] # cnn output
loss = loss_func(output, b_y) # cross entropy loss
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
if step % 100 == 0:
test_output, last_layer = cnn(test_x)
pred_y = torch.max(test_output, 1)[1].data.squeeze()
accuracy = sum(pred_y == test_y) / float(test_y.size(0))
print('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0], '| test accuracy: %.2f' % accuracy)
if HAS_SK:
# Visualization of trained flatten layer (T-SNE)tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
plot_only = 500
low_dim_embs = tsne.fit_transform(last_layer.data.numpy()[:plot_only, :])
labels = test_y.numpy()[:plot_only]
plot_with_labels(low_dim_embs, labels)
plt.ioff()
# print 10 predictions from test datatest_output, _ = cnn(test_x[:10])
pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
print(pred_y, 'prediction number')
print(test_y[:10].numpy(), 'real number')
# [todo]: Statistics/confusion matrix using sklearnimport scipy.misc
from PIL import Image
import json
from torchvision import models
from torchvision import transforms, utils
import numpy as np
def to_grayscale(image):
""" input is (d,w,h) converts 3D image tensor to grayscale images corresponding to each channel """image = torch.sum(image, dim=0)
image = torch.div(image, image.shape[0])
return image
def normalize(image):
normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
preprocess = transforms.Compose([
transforms.Resize((224,224)),
transforms.ToTensor(),
normalize])
if USE_CUDA:
image = Variable(preprocess(image).unsqueeze(0).cuda())
else:image = Variable(preprocess(image).unsqueeze(0))
return image
def predict(image):
_, index = vgg(image).data[0].max(0)
return str(index[0]), labels[str(index[0])][1]
def deprocess(image):
if USE_CUDA:
return image * torch.Tensor([0.229, 0.224, 0.225]).cuda() + torch.Tensor([0.485, 0.456, 0.406]).cuda()
else:return image * torch.Tensor([0.229, 0.224, 0.225]) + torch.Tensor([0.485, 0.456, 0.406])
def load_image(path):
image = Image.open(path)
plt.imshow(image)
plt.title("Image loaded successfully")
return image
kitten_1 = load_image("./images/Tongue-Kitten.jpg")
vgg = models.vgg16(pretrained=True)
if USE_CUDA:
vgg = vgg.cuda() # if you want GPU
print(vgg)
labels = json.load(open('labels/imagenet_class_index.json'))
kitten_2 = normalize(kitten_1)
print(predict(kitten_2))
modulelist = list(vgg.features.modules())
def layer_outputs(image):
outputs = [] names = []for layer in modulelist[1:]:
image = layer(image)
outputs.append(image)
names.append(str(layer))
output_im = []for i in outputs:
i = i.squeeze(0)
temp = to_grayscale(i)
output_im.append(temp.data.cpu().numpy())
fig = plt.figure()
plt.rcParams["figure.figsize"] = (30, 50)
for i in range(len(output_im)):
a = fig.add_subplot(8,4,i+1)
imgplot = plt.imshow(output_im[i])
plt.axis('off')
a.set_title(names[i].partition('(')[0], fontsize=30)
plt.savefig('layer_outputs.jpg', bbox_inches='tight')
layer_outputs(kitten_2)
def filter_outputs(image, layer_to_visualize):
if layer_to_visualize < 0:
layer_to_visualize += 31
output = None
name = None
for count, layer in enumerate(modulelist[1:]):
image = layer(image)
if count == layer_to_visualize:
output = image
name = str(layer)
filters = []output = output.data.squeeze()
for i in range(output.shape[0]):
filters.append(output[i,:,:])
fig = plt.figure()
plt.rcParams["figure.figsize"] = (10, 10)
for i in range(int(np.sqrt(len(filters))) * int(np.sqrt(len(filters)))):
fig.add_subplot(np.sqrt(len(filters)), np.sqrt(len(filters)),i+1)
imgplot = plt.imshow(filters[i])
plt.axis('off')
# print(len(filters))# print(filters[0].shape)# print(output.shape)filter_outputs(kitten_2, 0)
filter_outputs(kitten_2, -1)
normalise = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
preprocess = transforms.Compose([
transforms.Resize((224,224)),
transforms.ToTensor(),
normalise])
def make_saliency_map(input, label):
if USE_CUDA:
input = Variable(preprocess(input).unsqueeze(0).cuda(), requires_grad=True)
else:input = Variable(preprocess(input).unsqueeze(0), requires_grad=True)
output = vgg.forward(input)
output[0][label].backward()
grads = input.grad.data.clamp(min=0)
grads.squeeze_()
grads.transpose_(0,1)
grads.transpose_(1,2)
grads = np.amax(grads.cpu().numpy(), axis=2)
true_image = input.data
true_image = true_image.squeeze()
true_image = true_image.transpose(0,1)
true_image = true_image.transpose(1,2)
true_image = deprocess(true_image)
fig = plt.figure()
plt.rcParams["figure.figsize"] = (20, 20)
a = fig.add_subplot(1,2,1)
imgplot = plt.imshow(true_image)
plt.title('Original Image')
plt.axis('off')
a = fig.add_subplot(1,2,2)
imgplot = plt.imshow(grads)
plt.axis('off')
plt.title('Saliency Map')
return grads
dog = load_image('images/Golden_retr.jpg')
dog_sal = make_saliency_map(dog, 207)
goldfish = load_image('images/goldfish.jpg')
goldfish_sal = make_saliency_map(goldfish, 1)
def smooth_grad(input, label, x=10, percent_noise=10):
""" The apparent noise one sees in a sensitivity map may be due to essentially meaningless local variations in partial derivatives. After all, given typical training techniques there is no reason to expect derivatives to vary smoothly. """if USE_CUDA:
tensor_input = torch.from_numpy(np.array(input)).type(torch.cuda.FloatTensor) # input is now of shape (w,h,c)
else:tensor_input = torch.from_numpy(np.array(input)).type(torch.FloatTensor) # input is now of shape (w,h,c)
# x is the sample sizeif USE_CUDA:
final_grad = torch.zeros((1,3,224,224)).cuda()
else:final_grad = torch.zeros((1,3,224,224))
for i in range(x):
print('Sample:', i+1)
temp_input = tensor_input
# According to the paper, noise level corrresponds to stddev/(xmax-xmin). Hence stddev = noise_percentage * (max-min) /100if USE_CUDA:
noise = torch.from_numpy(np.random.normal(loc=0, scale=(percent_noise/100) * (tensor_input.max() - tensor_input.min()), size=temp_input.shape)).type(torch.cuda.FloatTensor)
else:noise = torch.from_numpy(np.random.normal(loc=0, scale=(percent_noise/100) * (tensor_input.max() - tensor_input.min()), size=temp_input.shape)).type(torch.FloatTensor)
temp_input = (temp_input + noise).cpu().numpy()
temp_input = Image.fromarray(temp_input.astype(np.uint8))
if USE_CUDA:
temp_input = Variable(preprocess(temp_input).unsqueeze(0).cuda(), requires_grad=True)
else:temp_input = Variable(preprocess(temp_input).unsqueeze(0), requires_grad=True)
output = vgg.forward(temp_input)
output[0][label].backward()
final_grad += temp_input.grad.data
grads = final_grad/x
grads = grads.clamp(min=0)
grads.squeeze_()
grads.transpose_(0,1)
grads.transpose_(1,2)
grads = np.amax(grads.cpu().numpy(), axis=2)
true_image = normalize(input)
true_image = true_image.squeeze()
true_image = true_image.transpose(0,1)
true_image = true_image.transpose(1,2)
true_image = deprocess(true_image.data)
fig = plt.figure()
plt.rcParams["figure.figsize"] = (20, 20)
a = fig.add_subplot(1,2,1)
imgplot = plt.imshow(true_image)
plt.title('Original Image')
plt.axis('off')
a = fig.add_subplot(1,2,2)
imgplot = plt.imshow(grads)
plt.axis('off')
plt.title('SmoothGrad, Noise: ' + str(percent_noise) + '%, ' + 'Samples: ' + str(x))
return grads
dog_sg = load_image('images/Golden_retr.jpg')
dog_sal = make_saliency_map(dog_sg, 1)
dog_sg_sal = smooth_grad(dog, 207, 30, 10)
goldfish_sg = load_image('images/goldfish.jpg')
godlfish_sal = make_saliency_map(goldfish_sg, 1)
goldfish_sg_sal = smooth_grad(goldfish, 1, 30, 10)